package Q17_26_Sparse_Similarity;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map.Entry;
import CtCILibrary.AssortedMethods;
public class QuestionA {
public static HashMap<DocPair, Double> computeSimilarities(HashMap<Integer, Document> documents) {
ArrayList<Document> docs = new ArrayList<Document>();
for (Document doc : documents.values()) {
docs.add(doc);
}
return computeSimilarities(docs);
}
public static HashMap<DocPair, Double> computeSimilarities(ArrayList<Document> documents) {
HashMap<DocPair, Double> similarities = new HashMap<DocPair, Double>();
for (int i = 0; i < documents.size(); i++) {
for (int j = i + 1; j < documents.size(); j++) {
Document doc1 = documents.get(i);
Document doc2 = documents.get(j);
double sim = computeSimilarity(doc1, doc2);
if (sim > 0) {
DocPair pair = new DocPair(doc1.getId(), doc2.getId());
similarities.put(pair, sim);
}
}
}
return similarities;
}
public static double computeSimilarity(Document doc1, Document doc2) {
int intersection = 0;
HashSet<Integer> set1 = new HashSet<Integer>();
set1.addAll(doc1.getWords());
for (int word : doc2.getWords()) {
if (set1.contains(word)) {
intersection++;
}
}
double union = doc1.size() + doc2.size() - intersection;
return intersection / union;
}
public static ArrayList<Integer> removeDups(int[] array) {
HashSet<Integer> set = new HashSet<Integer>();
for (int a : array) {
set.add(a);
}
ArrayList<Integer> list = new ArrayList<Integer>();
list.addAll(set);
return list;
}
public static void main(String[] args) {
int numDocuments = 10;
int docSize = 5;
HashMap<Integer, Document> documents = new HashMap<Integer, Document>();
for (int i = 0; i < numDocuments; i++) {
int[] words = AssortedMethods.randomArray(docSize, 0, 10);
ArrayList<Integer> w = Tester.removeDups(words);
System.out.println(i + ": " + w.toString());
Document doc = new Document(i, w);
documents.put(i, doc);
}
HashMap<DocPair, Double> similarities = computeSimilarities(documents);
Tester.printSim(similarities);
}
}